package com.zx.reader.impl;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import com.zx.reader.FileReader;
import com.zx.util.StringUtils;

/**
 * @Project: dataParse-core
 * @Title: WordFileReader
 * @Description: word 读取器
 * @author: zhangxue
 * @date: 2018年2月19日下午1:45:47
 * @company: alibaba
 * @Copyright: Copyright (c) 2015
 * @version v1.0
 */
public class WordFileReader implements FileReader {
	
	private String filePath;
	
	public WordFileReader(String filePath) {
		this.filePath = filePath;
	}
	
	/**
	 * @Title: 按行获得数据
	 * @return
	 * @date: 2018年2月19日下午2:22:58
	 */
	public List<String> readLines() {
		String text = getText();
		String[] split = text.split("\r");
		List<String> lines = Arrays.asList(split);
		return lines;
	}
	
	/**
	 * @Title: 获得整个文档的文本
	 * @return
	 * @date: 2018年2月19日下午2:23:28
	 */
	public String getText(){
		File file = new File(this.filePath);
		if(filePath.endsWith(".doc")) {
			return this.readDoc(file);
		} else {
			return this.readDocx(file);
		}
	}
	
	private String readDoc(File docFile){
		FileInputStream fis = null;
        try {
            fis = new FileInputStream(docFile);
            HWPFDocument doc = new HWPFDocument(fis);
            return doc.getText().toString();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
        	try {
				fis.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
        }
        return "";
	}

	private String readDocx(File docxFile){
		FileInputStream fis = null;
        try {
            fis = new FileInputStream(docxFile);
            XWPFDocument xdoc = new XWPFDocument(fis);
            XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
            return extractor.getText();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
        	try {
				fis.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
        }
        return "";
	}
	
	@Override
	public void close() throws IOException {
		this.close();		
	}

	/**
	 * @Title: 获得文本的字数
	 * @return
	 * @throws IOException 
	 * @date: 2018年2月17日下午3:01:20
	 */
	public long getWordCount() throws IOException{
		String text = this.getText();
		return StringUtils.getWordsCount(text);
	}
}
